#获取贴吧分类
import datetime
import time
import csv
from crawler import download
from bs4 import BeautifulSoup
import json
from pymongo import MongoClient
import urllib.parse
appkey=22080

url='http://tieba.baidu.com/f/index/forumclass'
baseurl='http://tieba.baidu.com'


client=MongoClient('localhost',27017)
db=client.cache

rs={}
headers={}
headers['User-agent'] = 'wzwp'

if __name__ == '__main__':       
    html=download(url,headers=headers, proxy=None,num_retries=-1)    
    soup=BeautifulSoup(html,'html.parser')    
    #li=soup.find_all(attrs={'class':'f_class_li'}) #获取左侧列表
    ul=soup.find_all(attrs={'class':'item-list-ul clearfix'}) #直接获取列表
    for li in ul:
        for lichild in li: 
            #rs[lichild.text]=lichild.next.attrs['href']
            href=lichild.next.attrs['href']
            name=lichild.text
            db.tiabamulu.update({'_id':href},{'$set':{'name':name}},upsert=True)
    #print(rs)
    print("Complete!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")